// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: powerpc-registered-target

// RUN: %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr8 -DNO_MM_MALLOC -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s
// RUN: %clang -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr8 -DNO_MM_MALLOC -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s

#include <pmmintrin.h>

__m128d resd, md1, md2;
__m128 res, m1, m2;
__m128i resi, mi;
double *d;

void __attribute__((noinline))
test_pmmintrin() {
  resd = _mm_addsub_pd(md1, md2);
  res = _mm_addsub_ps(m1, m2);
  resd = _mm_hadd_pd(md1, md2);
  res = _mm_hadd_ps(m1, m2);
  resd = _mm_hsub_pd(md1, md2);
  res = _mm_hsub_ps(m1, m2);
  resi = _mm_lddqu_si128(&mi);
  resd = _mm_loaddup_pd(d);
  resd = _mm_movedup_pd(md1);
  res = _mm_movehdup_ps(m1);
  res = _mm_moveldup_ps(m1);
}

// CHECK-LABEL: @test_pmmintrin

// CHECK: define available_externally <2 x double> @_mm_addsub_pd(<2 x double> [[REG1:[0-9a-zA-Z_%.]+]], <2 x double> [[REG2:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG1]], <2 x double>* [[REG3:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x double> [[REG2]], <2 x double>* [[REG4:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x double> <double -0.000000e+00, double 0.000000e+00>, <2 x double>* [[REG5:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG7:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG4]], align 16
// CHECK-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_xor(double vector[2], double vector[2])(<2 x double> [[REG7]], <2 x double> <double -0.000000e+00, double 0.000000e+00>)
// CHECK-NEXT: store <2 x double> [[REG8]], <2 x double>* [[REG6:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG3]], align 16
// CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG6]], align 16
// CHECK-NEXT: [[REG11:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_add(double vector[2], double vector[2])(<2 x double> [[REG9]], <2 x double> [[REG10]])
// CHECK-NEXT: ret <2 x double> [[REG11]]

// CHECK: define available_externally <4 x float> @_mm_addsub_ps(<4 x float> [[REG12:[0-9a-zA-Z_%.]+]], <4 x float> [[REG13:[0-9a-zA-Z_%.]+]])
// CHECK: store <4 x float> [[REG12]], <4 x float>* [[REG14:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <4 x float> [[REG13]], <4 x float>* [[REG15:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <4 x float> <float -0.000000e+00, float 0.000000e+00, float -0.000000e+00, float 0.000000e+00>, <4 x float>* [[REG16:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG15]], align 16
// CHECK-NEXT: [[REG19:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_xor(float vector[4], float vector[4])(<4 x float> [[REG18]], <4 x float> <float -0.000000e+00, float 0.000000e+00, float -0.000000e+00, float 0.000000e+00>)
// CHECK-NEXT: store <4 x float> [[REG19]], <4 x float>* [[REG17:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG14]], align 16
// CHECK-NEXT: [[REG21:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG17]], align 16
// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_add(float vector[4], float vector[4])(<4 x float> [[REG20]], <4 x float> [[REG21]])
// CHECK-NEXT: ret <4 x float> [[REG22]]

// CHECK: define available_externally <2 x double> @_mm_hadd_pd(<2 x double> [[REG23:[0-9a-zA-Z_%.]+]], <2 x double> [[REG24:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG23]], <2 x double>* [[REG25:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x double> [[REG24]], <2 x double>* [[REG26:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG25]], align 16
// CHECK-NEXT: [[REG28:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG26]], align 16
// CHECK-NEXT: [[REG29:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_mergeh(double vector[2], double vector[2])(<2 x double> [[REG27]], <2 x double> [[REG28]])
// CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG25]], align 16
// CHECK-NEXT: [[REG31:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG26]], align 16
// CHECK-NEXT: [[REG32:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_mergel(double vector[2], double vector[2])(<2 x double> [[REG30]], <2 x double> [[REG31]])
// CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_add(double vector[2], double vector[2])(<2 x double> [[REG29]], <2 x double> [[REG32]])
// CHECK-NEXT: ret <2 x double> [[REG33]]

// CHECK: define available_externally <4 x float> @_mm_hadd_ps(<4 x float> [[REG34:[0-9a-zA-Z_%.]+]], <4 x float> [[REG35:[0-9a-zA-Z_%.]+]])
// CHECK: store <4 x float> [[REG34]], <4 x float>* [[REG36:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <4 x float> [[REG35]], <4 x float>* [[REG37:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 16, i8 17, i8 18, i8 19, i8 24, i8 25, i8 26, i8 27>, <16 x i8>* [[REG38:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 20, i8 21, i8 22, i8 23, i8 28, i8 29, i8 30, i8 31>, <16 x i8>* [[REG39:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG40:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG36]], align 16
// CHECK-NEXT: [[REG41:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG37]], align 16
// CHECK-NEXT: [[REG42:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG38:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG43:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG40]], <4 x float> [[REG41]], <16 x i8> [[REG42]])
// CHECK-NEXT: [[REG44:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG36]], align 16
// CHECK-NEXT: [[REG45:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG37]], align 16
// CHECK-NEXT: [[REG46:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG39:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG47:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG44]], <4 x float> [[REG45]], <16 x i8> [[REG46]])
// CHECK-NEXT: [[REG48:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_add(float vector[4], float vector[4])(<4 x float> [[REG43]], <4 x float> [[REG47]])
// CHECK-NEXT: ret <4 x float> [[REG48]]

// CHECK: define available_externally <2 x double> @_mm_hsub_pd(<2 x double> [[REG49:[0-9a-zA-Z_%.]+]], <2 x double> [[REG50:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG49]], <2 x double>* [[REG51:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x double> [[REG50]], <2 x double>* [[REG52:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG53:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG51]], align 16
// CHECK-NEXT: [[REG54:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG52]], align 16
// CHECK-NEXT: [[REG55:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_mergeh(double vector[2], double vector[2])(<2 x double> [[REG53]], <2 x double> [[REG54]])
// CHECK-NEXT: [[REG56:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG51]], align 16
// CHECK-NEXT: [[REG57:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG52]], align 16
// CHECK-NEXT: [[REG58:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_mergel(double vector[2], double vector[2])(<2 x double> [[REG56]], <2 x double> [[REG57]])
// CHECK-NEXT: [[REG59:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_sub(double vector[2], double vector[2])(<2 x double> [[REG55]], <2 x double> [[REG58]])
// CHECK-NEXT: ret <2 x double> [[REG59]]

// CHECK: define available_externally <4 x float> @_mm_hsub_ps(<4 x float> [[REG60:[0-9a-zA-Z_%.]+]], <4 x float> [[REG61:[0-9a-zA-Z_%.]+]])
// CHECK: store <4 x float> [[REG60]], <4 x float>* [[REG62:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <4 x float> [[REG61]], <4 x float>* [[REG63:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 16, i8 17, i8 18, i8 19, i8 24, i8 25, i8 26, i8 27>, <16 x i8>* [[REG64:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 20, i8 21, i8 22, i8 23, i8 28, i8 29, i8 30, i8 31>, <16 x i8>* [[REG65:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG66:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG62]], align 16
// CHECK-NEXT: [[REG67:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG63]], align 16
// CHECK-NEXT: [[REG68:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG64]], align 16
// CHECK-NEXT: [[REG69:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG66]], <4 x float> [[REG67]], <16 x i8> [[REG68]])
// CHECK-NEXT: [[REG70:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG62]], align 16
// CHECK-NEXT: [[REG71:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG63]], align 16
// CHECK-NEXT: [[REG72:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG65]], align 16
// CHECK-NEXT: [[REG73:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG70]], <4 x float> [[REG71]], <16 x i8> [[REG72]])
// CHECK-NEXT: [[REG74:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sub(float vector[4], float vector[4])(<4 x float> [[REG69]], <4 x float> [[REG73]])
// CHECK-NEXT: ret <4 x float> [[REG74]]

// CHECK: define available_externally <2 x i64> @_mm_lddqu_si128(<2 x i64>* [[REG75:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64>* [[REG75]], <2 x i64>** [[REG76:[0-9a-zA-Z_%.]+]], align 8
// CHECK-NEXT: [[REG77:[0-9a-zA-Z_%.]+]] = load <2 x i64>*, <2 x i64>** [[REG76]], align 8
// CHECK-NEXT: [[REG78:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64>* [[REG77]] to i32*
// CHECK-NEXT: [[REG79:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vsx_ld(int, int const*)(i32 signext 0, i32* [[REG78]])
// CHECK-NEXT: [[REG80:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG79]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG80]]

// CHECK: define available_externally <2 x double> @_mm_loaddup_pd(double* [[REG81:[0-9a-zA-Z_%.]+]])
// CHECK: store double* [[REG81]], double** [[REG82:[0-9a-zA-Z_%.]+]], align 8
// CHECK-NEXT: [[REG83:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG82]], align 8
// CHECK-NEXT: [[REG84:[0-9a-zA-Z_%.]+]] = load double, double* [[REG83]], align 8
// CHECK-NEXT: [[REG85:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double [[REG84]])
// CHECK-NEXT: ret <2 x double> [[REG85]]

// CHECK: define available_externally <2 x double> @_mm_movedup_pd(<2 x double> [[REG86:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG86]], <2 x double>* [[REG87:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG88:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG87]], align 16
// CHECK-NEXT: [[REG89:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG87]], align 16
// CHECK-NEXT: [[REG90:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_shuffle_pd(<2 x double> [[REG88]], <2 x double> [[REG89]], i32 signext 0)
// CHECK-NEXT: ret <2 x double> [[REG90]]

// CHECK: define available_externally <4 x float> @_mm_movehdup_ps(<4 x float> [[REG91:[0-9a-zA-Z_%.]+]])
// CHECK: store <4 x float> [[REG91]], <4 x float>* [[REG92:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG93:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG92]], align 16
// CHECK-NEXT: [[REG94:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG93]] to <4 x i32>
// CHECK-NEXT: [[REG95:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG92]], align 16
// CHECK-NEXT: [[REG96:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG95]] to <4 x i32>
// CHECK-NEXT: [[REG97:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_mergeo(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG94]], <4 x i32> [[REG96]])
// CHECK-NEXT: [[REG98:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG97]] to <4 x float>
// CHECK-NEXT: ret <4 x float> [[REG98]]

// CHECK: define available_externally <4 x float> @_mm_moveldup_ps(<4 x float> [[REG99:[0-9a-zA-Z_%.]+]])
// CHECK: store <4 x float> [[REG99]], <4 x float>* [[REG100:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG101:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG100]], align 16
// CHECK-NEXT: [[REG102:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG101]] to <4 x i32>
// CHECK-NEXT: [[REG103:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG100]], align 16
// CHECK-NEXT: [[REG104:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG103]] to <4 x i32>
// CHECK-NEXT: [[REG105:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_mergee(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG102]], <4 x i32> [[REG104]])
// CHECK-NEXT: [[REG106:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG105]] to <4 x float>
// CHECK-NEXT: ret <4 x float> [[REG106]]
